### REPLICATION FILE -- MAIN ANALYSIS
### Jonathan Homola
### "The Effects of Women's Descriptive Representation on Government Behavior"
### Legislative Studies Quarterly

## clear environment, set seed, install/load packages
rm(list=ls())
set.seed(12435); options(stringsAsFactors=F)
# install.packages("foreign"); install.packages("stargazer")
# install.packages("survey"); install.packages("xtable")
# install.packages("data.table"); install.packages("pBrackets")
library(foreign); library(stargazer); library(survey); library(psych); library(xtable); library(data.table); library(pBrackets)

# GLM confidence intervals
glm.cis <- function(preds, ses, alpha, df){
  tval = qt((1-alpha)/2, df, lower.tail = F)
  raw_conf = cbind(preds-(tval*ses), preds+(tval*ses))
  trans_conf = exp(raw_conf)/(1+exp(raw_conf))
  out <- cbind(preds, raw_conf, exp(preds)/(1+exp(preds)), trans_conf)
  colnames(out) <- c('eta', 'low.eta', 'hi.eta', 'pred', 'low.pred', 'hi.pred')
  return(out)
}

# calculate R^2 for survey design models
fit.svyglm = function(svyglm, digits=3){
  if(methods::is(svyglm, "svyglm")!=TRUE) message("Warning: Not a svyglm object.")
  r2 = (svyglm$null.deviance - svyglm$deviance) / svyglm$null.deviance 
  adjust = svyglm$df.null / svyglm$df.residual
  value  = 1 - ((1-r2) * adjust)
  results = c(R2=round(r2, digits), adjR2=round(value, digits))
  names(results) = c("R-Squared","     Adjusted R-Squared")
  return(results)
}

## set working directory
setwd(" ... ")

## read in the dataset 
pledges_data <- read.dta("Homola_Pledges.dta")

# create subset variable for Table 2 in Thomson et al
pledges_data$subset1 <- ifelse(pledges_data$govparty==1 & pledges_data$sq==0 & (pledges_data$excllink==0 | is.na(pledges_data$excllink)==T), 1, 0)
df_tab2 <- subset(pledges_data, subset1==1)

# survey weights and design for full data set
design.cab <- svydesign(ids=~idmanifesto, weights=~sampwtmod7770, data=df_tab2)

## models for Table 1 (full results in Appendix Table A.3)
m1a <- svyglm(fulfil2 ~ gtsinmin + gtcomaj + gtcomin + chex + ministry + govlogrilerange +  HHgovpr + presidential + semipres + bicameral + federal + EUmember + growav + xtimeyrs + opppreel + outalways + npledgediv10 + precoagree + degomedlogrile + y1980s + y1990s + y2000s + subset + femaleleader_complete, family=quasibinomial(link="logit"), data=df_tab2, design=design.cab)
m1b <- svyglm(fulfil2 ~ gtsinmin + gtcomaj + gtcomin + chex + ministry + govlogrilerange +  HHgovpr + presidential + semipres + bicameral + federal + EUmember + growav + xtimeyrs + opppreel + outalways + npledgediv10 + precoagree + degomedlogrile + y1980s + y1990s + y2000s + subset + women_cabinet, family=quasibinomial(link="logit"), data=df_tab2, design=design.cab)
m2a <- svyglm(fulfil2 ~ as.factor(parfam) + gtsinmin + gtcomaj + gtcomin + chex + ministry + govlogrilerange +  HHgovpr + presidential + semipres + bicameral + federal + EUmember + growav + xtimeyrs + opppreel + outalways + npledgediv10 + precoagree + degomedlogrile + y1980s + y1990s + y2000s + subset + femaleleader_complete, family=quasibinomial(link="logit"), data=df_tab2, design=design.cab)
m2b <- svyglm(fulfil2 ~ as.factor(parfam) + gtsinmin + gtcomaj + gtcomin + chex + ministry + govlogrilerange +  HHgovpr + presidential + semipres + bicameral + federal + EUmember + growav + xtimeyrs + opppreel + outalways + npledgediv10 + precoagree + degomedlogrile + y1980s + y1990s + y2000s + subset + women_cabinet, family=quasibinomial(link="logit"), data=df_tab2, design=design.cab)
m3a <- svyglm(fulfil2 ~ as.factor(partyid) + gtsinmin + gtcomaj + gtcomin + chex + ministry + govlogrilerange +  HHgovpr + presidential + semipres + bicameral + federal + EUmember + growav + xtimeyrs + opppreel + outalways + npledgediv10 + precoagree + degomedlogrile + y1980s + y1990s + y2000s + subset + femaleleader_complete, family=quasibinomial(link="logit"), data=df_tab2, design=design.cab)
m3b <- svyglm(fulfil2 ~ as.factor(partyid) + gtsinmin + gtcomaj + gtcomin + chex + ministry + govlogrilerange +  HHgovpr + presidential + semipres + bicameral + federal + EUmember + growav + xtimeyrs + opppreel + outalways + npledgediv10 + precoagree + degomedlogrile + y1980s + y1990s + y2000s + subset + women_cabinet, family=quasibinomial(link="logit"), data=df_tab2, design=design.cab)


###
### Table 1 (full results in Appendix Table A.3)
###

stargazer(m1a, m2a, m3a, m1b, m2b, m3b,
          covariate.labels=c("Single-party minority", "Coalition majority", "Coalition minority",
                                            "Chief executive", "Relevant portfolio", "Ideological range",
                                            "Herfindahl index", "Presidentialism", "Semi-presidentialism",
                                            "Bicameralism", "Federalism", "EU member", "GDP growth",
                                            "Duration in years", "Opposition parties with experience",
                                            "Opposition parties without experience", "Number of pledges (/10)",
                                            "Pre-election coalition", "Ideological distance to median legislator",
                                            "1980s", "1990s", "2000s", "Subset of pledges tested",
                                            "Female party leader", "Share of women in cabinet", "Constant"),
          digits=2, star.cutoffs = c(0.05, 0.01, NA), dep.var.labels.include = FALSE, no.space=T,
          dep.var.caption="Outcome variable: Probability of pledge fulfillment",
          omit = "as.factor",
          add.lines = list(c("Thomson et al. covariates", "checkmark", "checkmark", "checkmark", "checkmark", "checkmark", "checkmark"),
                           c("Party family FEs", "", "checkmark", "", "", "checkmark", ""),
                           c("Party FEs", "", "", "checkmark", "", "", "checkmark")))

## check r2
fit.svyglm(m1a)
fit.svyglm(m2a)
fit.svyglm(m3a)
fit.svyglm(m1b)
fit.svyglm(m2b)
fit.svyglm(m3b)

## check number of party programs
aggregate(df_tab2[, 5], list(df_tab2$idmanifesto, df_tab2$year), mean)

## check variation on female party leader with partyid FEs
part1 <- aggregate(df_tab2[, 83], list(df_tab2$partyid, df_tab2$year), mean)
aggregate(part1[, 3], list(part1$Group.1), mean)
## only two parties with change in femaleleader: UK Conservatives and Irish Progressive Democrats


###
### Figure 1
###

### Table 1, Model 2, femlead, party family FE (parfam=60, social democratic party)
m2a <- svyglm(fulfil2 ~ as.factor(parfam) + gtsinmin + gtcomaj + gtcomin + chex + ministry + govlogrilerange +  HHgovpr + presidential + semipres + bicameral + federal + EUmember + growav + xtimeyrs + opppreel + outalways + npledgediv10 + precoagree + degomedlogrile + y1980s + y1990s + y2000s + subset + femaleleader_complete, family=quasibinomial(link="logit"), data=df_tab2, design=design.cab)
newdat_feml <- data.frame(gtsinmin = rep(0, 2), gtcomaj = rep(0, 2), gtcomin = rep(0, 2),
                          chex = rep(1, 2), ministry = rep(1, 2),
                          govlogrilerange = rep(0.4061, 2), HHgovpr = rep(0.7503, 2),
                          presidential = rep(0, 2), semipres = rep(0, 2), bicameral = rep(1, 2),
                          federal = rep(0, 2), EUmember = rep(1, 2), growav = rep(2.455, 2),
                          xtimeyrs = rep(3.707, 2), opppreel = rep(0, 2), outalways = rep(0, 2),
                          npledgediv10 = rep(17.74, 2), precoagree = rep(0, 2),
                          degomedlogrile = rep(0.2491240, 2), y1980s = rep(0, 2),
                          y1990s = rep(0, 2), y2000s = rep(1, 2), subset = rep(0, 2), femaleleader_complete = c(0, 1),
                          parfam = rep(factor(60, levels(as.factor(df_tab2$parfam))), 2))
preds_feml <- predict(m2a, newdata = newdat_feml, se.fit = TRUE)
cis_feml <- round(glm.cis(as.numeric(preds_feml), sqrt(attr(preds_feml, "var")), 0.95, m2a$df.residual),4)

pdf("partyleader.pdf", height=8,width=6)
plot(0:1, cis_feml[,4], type="n",xlab="",ylab="",  yaxt="n", xaxt="n", xlim = c(-0.5,1.5), ylim = c(0.5,1))
segments(0:1, cis_feml[,5], 0:1, cis_feml[,6], lwd=2, col="gray60")
points(0:1, cis_feml[,4], pch=16, cex=3.5)
text(0:1, cis_feml[,4], labels = c("M", "F"), col="white", cex=1.5)
segments(0.05, cis_feml[1,4], 0.95, cis_feml[2,4], lty=2)
brackets(0, cis_feml[1,4]-0.12, 1, cis_feml[1,4]-0.12, h = -.05,  ticks = 0.5, lwd=2)
text(0.5, cis_feml[1,4]-0.2, bquote(hat(y)['F']-hat(y)['M'] ~ '=' ~ .(cis_feml[2,4]-cis_feml[1,4])), cex=1.5)
axis(1, at=0:1, labels = c("Male Leader", "Female Leader"), tck=0.03, cex.axis=1.5, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1)
axis(2, tck=0.03, cex.axis=1.5, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1, las=2)
title(xlab = 'Gender of Party Leader', ylab="Probability of Pledge Fulfillment", line = 2.25, cex.lab=1.5)
dev.off()


### Table 1, Model 5, wcab, party family FE (parfam=60, social democratic party)
# drop covariates that are estimated as NA because of collinearity: presidential + semipres + bicameral + federal 
m2b <- svyglm(fulfil2 ~ as.factor(parfam) + gtsinmin + gtcomaj + gtcomin + chex + ministry + govlogrilerange +  HHgovpr + presidential + semipres + bicameral + federal + EUmember + growav + xtimeyrs + opppreel + outalways + npledgediv10 + precoagree + degomedlogrile + y1980s + y1990s + y2000s + subset + women_cabinet, family=quasibinomial(link="logit"), data=df_tab2, design=design.cab)
newdat_wcabiq <- data.frame(gtsinmin = rep(0, 2), gtcomaj = rep(0, 2), gtcomin = rep(0, 2),
                            chex = rep(1, 2), ministry = rep(1, 2), 
                            govlogrilerange = rep(0.4061, 2), HHgovpr = rep(0.7503, 2),
                            presidential = rep(0, 2), semipres = rep(0, 2), bicameral = rep(1, 2),
                            federal = rep(0, 2), EUmember = rep(1, 2), growav = rep(2.455, 2),
                            xtimeyrs = rep(3.707, 2), opppreel = rep(0, 2), outalways = rep(0, 2),
                            npledgediv10 = rep(17.74, 2), precoagree = rep(0, 2), 
                            degomedlogrile = rep(0.2491240, 2), y1980s = rep(0, 2),
                            y1990s = rep(0, 2), y2000s = rep(1, 2), subset = rep(0, 2),
                            women_cabinet = as.numeric(summary(df_tab2$women_cabinet)[c(2,5)]),
                            parfam = rep(factor(60, levels(as.factor(df_tab2$parfam))), 2))
newdat_wcabf <- data.frame(gtsinmin = rep(0, 200), gtcomaj = rep(0, 200), gtcomin = rep(0, 200),
                           chex = rep(1, 200), ministry = rep(1, 200), 
                           govlogrilerange = rep(0.4061, 200), HHgovpr = rep(0.7503, 200),
                           presidential = rep(0, 200), semipres = rep(0, 200), bicameral = rep(1, 200),
                           federal = rep(0, 200), EUmember = rep(1, 200), growav = rep(2.455, 200),
                           xtimeyrs = rep(3.707, 200), opppreel = rep(0, 200), outalways = rep(0, 200),
                           npledgediv10 = rep(17.74, 200), precoagree = rep(0, 200), 
                           degomedlogrile = rep(0.2491240, 200), y1980s = rep(0, 200),
                           y1990s = rep(0, 200), y2000s = rep(1, 200), subset = rep(0, 200),
                           women_cabinet = seq(0, 0.6, length.out=200),
                           parfam = rep(factor(60, levels(as.factor(df_tab2$parfam))), 200))
preds_wcabiq <- predict(m2b, newdata = newdat_wcabiq, se.fit = TRUE)
preds_wcabf <- predict(m2b, newdata = newdat_wcabf, se.fit = TRUE)
cis_wcabiq <- round(glm.cis(as.numeric(preds_wcabiq), sqrt(attr(preds_wcabiq, "var")), 0.95, m2b$df.residual),4)
cis_wcabf <- round(glm.cis(as.numeric(preds_wcabf), sqrt(attr(preds_wcabf, "var")), 0.95, m2b$df.residual),4)
iqrange <- cbind(summary(df_tab2$women_cabinet)[c(2,5)],cis_wcabiq[2,4] - cis_wcabf[1,4])

mygray = rgb(153, 153, 153, alpha = 200, maxColorValue = 255)
pdf("cabinet.pdf", height=8,width=6)
plot(newdat_wcabf$women_cabinet, cis_wcabf[,4], type="n",xlab="",ylab="",  yaxt="n", xaxt="n",
     ylim = c(0.5,1))
polygon(x = c(newdat_wcabf$women_cabinet, rev(newdat_wcabf$women_cabinet)),
        y = c(cis_wcabf[,5], rev(cis_wcabf[,6])), col = mygray, border = NA)
lines(newdat_wcabf$women_cabinet, cis_wcabf[,4], lwd=2)
rug(df_tab2$women_cabinet)
segments(iqrange[,1], cis_wcabiq[,4], iqrange[,1], rep(0.7,2), lty=2)
segments(iqrange[1,1], 0.7, iqrange[2,1], 0.7, lty = 2)
brackets(iqrange[1,1], 0.68, iqrange[2,1], 0.68, h = -0.05,  ticks = 0.5, lwd=2)
text(abs((iqrange[2,1]-iqrange[1,1])/2)+iqrange[1,1], 0.6, 'Interquartile range', cex=1.5)
text(iqrange[,1], cis_wcabiq[,4]+0.025, round(iqrange[,1],3), cex=1.5)
text(abs((iqrange[2,1]-iqrange[1,1])/2)+iqrange[1,1], 0.56,
     labels=bquote(hat(y)['Q3']-hat(y)['Q1'] ~ '=' ~ .(iqrange[1,2])), cex=1.5)
axis(1, tck=0.03, cex.axis=1.5, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1)
axis(2, tck=0.03, cex.axis=1.5, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1, las=2)
title(xlab = 'Share of Women in Cabinet',
      ylab="Probability of Pledge Fulfillment",
      line = 2.25, cex.lab=1.5)
dev.off()

